creating ranks

the ranks will be a named num, with entrezgene_id as name and stat (Wald Test) as metric

getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below gastroc gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq

ranks distribution

duplicate entrezgene_ids

duplicate entrezgene ids (multiple entrez are mapped to the same gene name)

Thus a quick look if any of these genes can be simply omitted. Like if the gene_type is “other” or “tRNA”

Since all ENSEMBL duplicates are also found in the gene_name duplicates, using the ENSEMBL as id for the ranks would reduce the number of duplicates by 107.

actually most of them are “protein coding” and will not be omitted. to be continued …

loading pathways

Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp

For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:

database gene sets
BioCarta 252
Reactome 1249
WikiPathways 186

applying fgsea

fgseaRes <- fgsea(
  pathways = CGP,
  stats    = ranks,
  minSize  = 15,
  maxSize  = 200
)

Enrichment score plot

ordering pathways by padj values and using ES to

# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=params$pCutoff, n=10) {
  .updown <- ifelse(up, `>`, `<`)
  
  top.pathways <- fgseaRes %>%
    filter(.updown(ES,0), padj < pCutoff) %>%
    arrange(padj) %>% 
    slice_head(n=n)
  
 return(top.pathways) 
}

plot for top up and down regulated pathways

# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
  # extracting the top n pathways
  top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
  
  plot.list <- list()
  # lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  for (i in 1:nrow(top.pathways)) {
    # filling plot.list with enrichmentPlots 
    # TODO: how can I use facet_wrap for this?
    pathway <- top.pathways[i]$pathway
    plt <- plotEnrichment(pathways[[pathway]], ranks) +
      # TODO: adjust yaxis to the same scale
      # TODO: keep axis.text.x only on the lower row
      # TODO: keep axis.text.y only on the right column
      theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank()
      ) # +
      # coord_cartesian(xlim = lims$x, ylim = lims$y)
    plot.list[[i]] <- plt
  }
    
  arrange_plts(plot.list)
}

# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
  nplts <- length(plt.list)
  plt <- plt.list[1]
  xlab <- plt$labels$x
  ylab <- plt$labels$y
  
  # set axis to the same scale
  lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
  
  # remove axis
  
  # arrange the plots
  fig_labels <- LETTERS[1:nplts]
  
  patchwork::wrap_plots(plt.list, )
  
  figure <- ggpubr::ggarrange(plotlist = plt.list,
                              labels = fig_labels) %>%
    annotate_figure(left = text_grob(ylab, rot = 90),
                    bottom = text_grob(xlab))
 
  # TODO: remove all x-axis labels except lower row
  # get dimensions
  figure$layers
   
  return(figure)
}



# plot_labels <-
#     data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)

gastroc up

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)


# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
B WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY
C WP_APOPTOSIS
D REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL
E WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY
F WP_CHEMOKINE_SIGNALING_PATHWAY
G BIOCARTA_TNFR2_PATHWAY
H REACTOME_DAP12_INTERACTIONS
I REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS

gastroc down

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
B REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS
C REACTOME_RESPIRATORY_ELECTRON_TRANSPORT
D WP_ELECTRON_TRANSPORT_CHAIN
E REACTOME_COMPLEX_I_BIOGENESIS
F REACTOME_MITOCHONDRIAL_TRANSLATION
G REACTOME_KEAP1_NFE2L2_PATHWAY
H REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA
I REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS

soleus up

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)


plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
B REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS
C REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC
D WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS
E WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
F REACTOME_EUKARYOTIC_TRANSLATION_INITIATION
G REACTOME_NONSENSE_MEDIATED_DECAY_NMD
H REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL
I REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA

soleus down

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_KEAP1_NFE2L2_PATHWAY
B REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS
C REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME
D REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A
E REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS
F REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA
G REACTOME_DEGRADATION_OF_DVL
H REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT
I REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS

GSEA table plot

gastroc

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.gastroc,
  fgseaRes = fgseaRes.gastroc,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

soleus

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.soleus,
  fgseaRes = fgseaRes.soleus,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

most differential regulated pathways, both tissues

using NES from the fgsea result filtering on the set `pCutoff=`0.01 yields the following plot:


pCutoff <- params$pCutoff
fgseaRes.combined <- merge(
  data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
  data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
  by = "pathway",
  suffixes = c(".ga", ".sol")
) %>%
  filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
  mutate(
    diff.exp = case_when(
      NES.ga  < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
      NES.ga  > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
      NES.ga  < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga down, sol up",
      NES.ga  > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga up, sol down",
                                  padj.ga < pCutoff & padj.sol > pCutoff ~ "only gastroc",
      # NES.ga  > 0 &               padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
                                  padj.ga > pCutoff & padj.sol < pCutoff ~ "only soleus",
      # NES.sol > 0 &               padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
      TRUE ~ "different"
    )
  )

# final plot
p <- ggplot(fgseaRes.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "NES")

plotly::ggplotly(p, tooltip = "all")

barplot

ggplot(fgseaRes.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp))

list of all significant pathways

pathways will be sorted here by the absolute sum of the NES…

both down

res.combined %>% 
  filter(diff.exp == "both down") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.print = 2))

both up

res.combined %>% 
  filter(diff.exp == "both up") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

only gastroc

fgseaRes.combined %>% 
  filter(diff.exp == "only gastroc") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

only soleus

fgseaRes.combined %>% 
  filter(diff.exp == "only soleus") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

currentTODOs

[ ] looking at duplicate entrezgene_ids
[ ] finding optimal maxSize (one sided curve)
[ ] find out biological meaning of significant pathways

---
title: "GSEA analysis"
author: "Nick Diercksen"
date: "Last compiled on `r format(Sys.time())`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
  html_document:
    toc: yes
    df_print: paged
editor_options:
  chunk_output_type: inline
params:
  pCutoff:
    value: 0.01
  fgsea.maxSize:
    label: "parameter for `fgsea`: 'Maximal size of a gene set to test. All pathways above the threshold are excluded.'"
    value: 200
  reevaluate:
    label: Should all calculations and database lookups be done again or just e.g.
      graphics regenerated
    value: no
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(fgsea)
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(plotly, include.only = "ggplotly")
```

```{r load_rdata, include=FALSE}

perform_fgsea <- params$reevaluate

load("./data/Robjects/02_annot.RData") # for entrezgene_id
load("./data/Robjects/03_DDS.RData")   # for `stat` values from DESeq results
```

# creating ranks

the `ranks` will be a `named num`, with `entrezgene_id` as name and `stat` (Wald Test) as metric

```{r getting_ranks}
getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below gastroc gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq
```

## ranks distribution

```{r ranks_barplot, echo=FALSE}
# no idea how to plot this with ggplot (one column) ...

# ggpubr::ggarrange(
# TODO: prettify
# barplot.file <- "./plots/04_barplot_ranks.png"
# if (!file.exists(barplot.file)) {
#   png(barplot.file)
    par(mfrow = c(1, 2))
    barplot(sort(ranks.gastroc, decreasing = T), main = "Wald Test ranks, gastroc")
    barplot(sort(ranks.soleus, decreasing = T), main = "Wald Test ranks, soleus")
  # dev.off()
# }
# )
```


## duplicate entrezgene_ids

duplicate entrezgene ids (multiple entrez are mapped to the same gene name)

Thus a quick look if any of these genes can be simply omitted. Like if the gene_type is "other" or "tRNA"

```{r include=FALSE}
# TODO: do not use annot, but the merge? (actually used genes!)
# first looking at ENSEMBL:
duplicate_ENSEMBL <- annot[duplicated(annot$GeneID), ] %>%
  dplyr::group_by(GeneID, gene_biotype) %>%
  summarise(n = n())

# looking at gene_names(ext):
duplicate_geneNames <- annot[duplicated(annot$external_gene_name), ] %>% 
  dplyr::group_by(GeneID, gene_biotype) %>%
  summarise(n = n())

# duplicates have always the same gene_biotype! (in no more than one group occurs the same GeneID)
anyDuplicated(duplicate_ENSEMBL$GeneID)   # 0
anyDuplicated(duplicate_geneNames$GeneID) # 0

# are all gene_name duplicates in ENS duplicates?
sum(duplicate_ENSEMBL$GeneID %in% duplicate_geneNames$GeneID) %>%
  sprintf(
    "%d / %d ENSEMBL duplications are also included in the duplicated gene_names",
    .,
    nrow(duplicate_ENSEMBL)
  )

diff_ENS_gene <- nrow(duplicate_geneNames) - nrow(duplicate_ENSEMBL)
```

Since all ENSEMBL duplicates are also found in the gene_name duplicates, using the ENSEMBL as id for the ranks would reduce the number of duplicates by `r diff_ENS_gene`.

actually most of them are "protein coding" and will not be omitted.
to be continued ...

```{r eval=FALSE, include=FALSE}
# TODO: do a e.g. bar-plot with the distribution of gene_biotypes of the duplicates
#       vs the whole genome
# create the df for plotting

ENS <- duplicate_ENSEMBL %>%
  dplyr::group_by(gene_biotype) %>%
  summarise(count = sum(n))


GN <- duplicate_geneNames %>%
  dplyr::group_by(gene_biotype) %>%
  summarise(count = sum(n))



ggplot(data=df2, aes(x=dose, y=len, fill=supp)) +
  geom_bar(stat="identity")

```

# loading pathways

Pathways are provided by <http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp>

For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:

| database     | gene sets |
|--------------|-----------|
| BioCarta     | 252       |
| Reactome     | 1249      |
| WikiPathways | 186       |

```{r reading_pathways, include=FALSE}
CGP <- qusage::read.gmt("./data/pathways/m2.cp.v2022.1.Mm.entrez.gmt") # canonical pathways (1687 gene sets)

# previously used pathways
# MH <- qusage::read.gmt("./data/pathways/mh.all.v2022.1.Mm.entrez.gmt") # 50 hallmark genes
# M2 <- qusage::read.gmt("./data/pathways/m2.all.v2022.1.Mm.entrez.gmt") # curated gene set with 2600 genes
```

# applying fgsea

    fgseaRes <- fgsea(
      pathways = CGP,
      stats    = ranks,
      minSize  = 15,
      maxSize  = `r params$fgsea.maxSize`
    )

```{r fgsea_res_load, include=FALSE}
# ' returns the fgsea result
# ' `tissue` needs to be one of {"gastroc", "soleus"}
# ' Also saves the results locally to avoid too much queries
get_fgseaRes <- function(tissue, ranks, maxSize=params$fgsea.maxSize, pathways=CGP, saveResults=T) {
  robjects.path <- file.path(".", "data", "Robjects")
  fgseaRes.path <- file.path(
    robjects.path,
    "04_fgsea_results",
    tissue,
    paste0("maxSize_", params$fgsea.maxSize, ".rds")
  )
  fgseaRes.dir <- dirname(fgseaRes.path)
  
  if (file.exists(fgseaRes.path) & !params$reevaluate) {
    message("fgsea result found in directory! Previous version will be loaded ...")
    return(readRDS(file = fgseaRes.path))
  } else {
    # perform fgsea and save the result
    fgseaRes <- fgsea(
      pathways = pathways,
      stats    = ranks,
      minSize  = 15,
      maxSize  = maxSize
    )
    
    if (saveResults) {
      if (!dir.exists(fgseaRes.dir))
        dir.create(fgseaRes.dir, recursive = T)
      saveRDS(fgseaRes, file = fgseaRes.path)
    }
    return(fgseaRes)
  }
}

fgseaRes.gastroc <- get_fgseaRes("gastroc", ranks.gastroc, pathways=CGP)
fgseaRes.soleus <- get_fgseaRes("soleus", ranks.soleus, pathways=CGP)
```

## Enrichment score plot

ordering pathways by padj values and using `ES` to

```{r fn_get_top_pathways}
# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=params$pCutoff, n=10) {
  .updown <- ifelse(up, `>`, `<`)
  
  top.pathways <- fgseaRes %>%
    filter(.updown(ES,0), padj < pCutoff) %>%
    arrange(padj) %>% 
    slice_head(n=n)
  
 return(top.pathways) 
}
```

plot for top up and down regulated pathways

```{r}
# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
  # extracting the top n pathways
  top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
  
  plot.list <- list()
  # lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  for (i in 1:nrow(top.pathways)) {
    # filling plot.list with enrichmentPlots 
    # TODO: how can I use facet_wrap for this?
    pathway <- top.pathways[i]$pathway
    plt <- plotEnrichment(pathways[[pathway]], ranks) +
      # TODO: adjust yaxis to the same scale
      # TODO: keep axis.text.x only on the lower row
      # TODO: keep axis.text.y only on the right column
      theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank()
      ) # +
      # coord_cartesian(xlim = lims$x, ylim = lims$y)
    plot.list[[i]] <- plt
  }
    
  arrange_plts(plot.list)
}

# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
  nplts <- length(plt.list)
  plt <- plt.list[1]
  xlab <- plt$labels$x
  ylab <- plt$labels$y
  
  # set axis to the same scale
  lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
  
  # remove axis
  
  # arrange the plots
  fig_labels <- LETTERS[1:nplts]
  
  patchwork::wrap_plots(plt.list, )
  
  figure <- ggpubr::ggarrange(plotlist = plt.list,
                              labels = fig_labels) %>%
    annotate_figure(left = text_grob(ylab, rot = 90),
                    bottom = text_grob(xlab))
 
  # TODO: remove all x-axis labels except lower row
  # get dimensions
  figure$layers
   
  return(figure)
}



# plot_labels <-
#     data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)

```

### gastroc up

```{r}
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)

# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
```

### gastroc down

```{r}
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
```

### soleus up

```{r}
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
```

### soleus down

```{r}
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
```

## GSEA table plot

### gastroc

top significant pathways:

```{r}
# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.gastroc,
  fgseaRes = fgseaRes.gastroc,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
```

### soleus

top significant pathways:

```{r}
# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.soleus,
  fgseaRes = fgseaRes.soleus,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
```

### most differential regulated pathways, both tissues

using `NES` from the fgsea result filtering on the set ``` pCutoff=``r params$pCutoff ``` yields the following plot:

```{r topDiff.both, warning=FALSE}

pCutoff <- params$pCutoff
fgseaRes.combined <- merge(
  data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
  data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
  by = "pathway",
  suffixes = c(".ga", ".sol")
) %>%
  filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
  mutate(
    diff.exp = case_when(
      NES.ga  < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
      NES.ga  > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
      NES.ga  < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga down, sol up",
      NES.ga  > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga up, sol down",
                                  padj.ga < pCutoff & padj.sol > pCutoff ~ "only gastroc",
      # NES.ga  > 0 &               padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
                                  padj.ga > pCutoff & padj.sol < pCutoff ~ "only soleus",
      # NES.sol > 0 &               padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
      TRUE ~ "different"
    )
  )

# final plot
p <- ggplot(fgseaRes.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "NES")

plotly::ggplotly(p, tooltip = "all")
```

### barplot

```{r diffGenesBarPlot}
ggplot(fgseaRes.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp))
```

### list of all significant pathways
pathways will be sorted here by the absolute sum of the NES...

#### both down
```{r}
fgseaRes.combined %>% 
  filter(diff.exp == "both down") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))
```

#### both up
```{r}
fgseaRes.combined %>% 
  filter(diff.exp == "both up") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))
```

#### only gastroc
```{r}
fgseaRes.combined %>% 
  filter(diff.exp == "only gastroc") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))
```

#### only soleus
```{r}
fgseaRes.combined %>% 
  filter(diff.exp == "only soleus") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))
```

```{r include = FALSE}
saveRDS(fgseaRes.combined, file = "./data/Robjects/04_fgseaRes.combined.rds")
```


# currentTODOs

[ ] looking at duplicate entrezgene_ids\
[ ] finding optimal `maxSize` (one sided curve)\
[ ] find out biological meaning of significant pathways\
